################################################
## Descriptive Tables/Figures ##################
################################################

pacman::p_load(
    tidyverse, # tidyverse
    panelr, # panel data analysis
    here, # computational reproducibility
    glue, # gluing objects and strings
    tidylog, # logging analysis
    naniar, # missing data
    readxl,
    ggpubr,
    broom,
    patchwork,
    plm,
    broom.mixed,
    estimatr,
    DeclareDesign,
    sensemakr,
    mice,
    testthat,
    modelsummary,
    flextable,
    psych
)
source(here("utils.r"))
source(here("theme.R"))

ggplot2::theme_set(theme_bw())


#load files
df <- read_csv(here("panel_data_jul24_2.csv"))

## Income Rescale
df$income <- scales::rescale(df$income, to = c(0, 1))

## Figure 1
prop.table(table(df$gendiscrim, df$wave),2)
prop.table(table(df$apa.discrim.rona, df$wave) ,2)

## Figure 3
dt <- read_csv(here("newtbl2.csv"))

plot_w123 <- ggplot(dt, aes(
  x = factor(proxy), y = mean,
  ymax = dt$mean + dt$ci_high,
  ymin = dt$mean - dt$ci_low,
  col = factor(group)
)) +
  geom_pointrange() +
  labs(
    x = "Response to general discrimination Q in W1",
    y = "Average response to COVID-19 discrimination Q in W2 & W3",
    col = "Wave"
  ) +
  geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd)) +
  geom_hline(yintercept = 0, linetype = "dotted", col = "black", size = 1) +
  ylim(c(1, 5))  + scale_color_grey()

plot_w123

ggsave(here("fig3.png"),
       height = 6,
       width = 6)

#mean and sd of general discrimination across waves 
table(df$gendiscrim) 
df$m.gen <- NA 
df$m.gen[df$gendiscrim==0] <- 1
df$m.gen[df$gendiscrim==0.25] <- 2
df$m.gen[df$gendiscrim==0.5] <- 3
df$m.gen[df$gendiscrim==0.75] <- 4
df$m.gen[df$gendiscrim==1] <- 5

tapply(df$m.gen, df$wave, mean, na.rm=T)
tapply(df$m.gen, df$wave, sd, na.rm=T)

#mean and sd of covid discrim
table(df$apa.discrim.rona)
df$m.rona <- NA
df$m.rona[df$apa.discrim.rona==0] <- 1
df$m.rona[df$apa.discrim.rona==0.25] <- 2
df$m.rona[df$apa.discrim.rona==0.5] <- 3
df$m.rona[df$apa.discrim.rona==0.75] <- 4
df$m.rona[df$apa.discrim.rona==1] <- 5
table(df$m.rona)

tapply(df$m.rona, df$wave, mean, na.rm=T)
tapply(df$m.rona, df$wave, sd, na.rm=T)

#diff in means test for Gen Discrim
t.test(subset(df, wave == 1)$gendiscrim, subset(df, wave == 3)$gendiscrim) 

#diff in means test for Covid Discrim
t.test(subset(df, wave == 2)$apa.discrim.rona, subset(df, wave == 3)$apa.discrim.rona) 

## Table 2
dv <- subset(df, wave == 1)$gendiscrim

covat2 <- df %>%
  filter(wave == 2) %>%
  select(apa.discrim.rona, usborn, edu, DEM, GOP, age, male, chinese, indian)

covat3 <- df %>%
  filter(wave == 3) %>%
  select(apa.discrim.rona, usborn, edu, DEM, GOP, age, male, chinese, indian)

covat2$gendiscrim <- dv
covat3$gendiscrim <- dv

# factorize dummy variables
covat2 <- factorize_dummy(covat2)
covat3 <- factorize_dummy(covat3)

mods_exo <- list(
  
  "2nd wave" = lm(apa.discrim.rona ~ gendiscrim + usborn + edu + DEM + GOP + age + male + chinese + indian, data = covat2),
  
  "3rd wave" = lm(apa.discrim.rona ~ gendiscrim + usborn + edu + DEM + GOP + age + male + chinese + indian, data = covat3)
  
)

modelsummary(mods_exo, output = "gt")

modelsummary(mods_exo,
             fmt = 3,
             coef_omit = "Intercept",
             statistic = c("p = {p.value}"),
             #stars = TRUE,
             output = here("table2.docx"),
             coef_rename = 
               c( "gendiscrim" = "W1 Gen discrim",
                  "apa.discrim.rona" = "COVID-19 discrim",
                  "usborn1" = "US born",
                  "edu" = "Education",
                  "age" = "Age",
                  "male1" = "Male",
                  "chinese1" = "Chinese",
                  "indian1" = "Indian",
                  "DEM1" = "Democratic Party",
                  "GOP1" = "Republican Party"),
             vcov = "robust")



## Figure 4
# create a proxy variable
df$proxy <- rep(subset(df, wave == 1)$gendiscrim, 3)

test_that("Proxy setting done correctly", {
    expect_equal(table(df$proxy)[5] %>% as.numeric(), 186)
})

df$prior <- NA 
df$prior[df$proxy == 0.5] <- "Moderate"
df$prior[df$proxy < 0.5] <- "Low"
df$prior[df$proxy > 0.5] <- "High"

# factorize

df <- df %>%
    mutate(prior = factor(prior, levels = c("Low", "Moderate", "High")))

## covariates
df$wave <- factor(df$wave)

table(df$wave_fac)

df$wave_time<- NA
df$wave_time[df$wave_fac=="February"] <- "Wave 1"
df$wave_time[df$wave_fac=="May"] <- "Wave 2"
df$wave_time[df$wave_fac=="November"] <- "Wave 3"

## Figure 4
cor_plot <- df %>%
    filter(wave_time %in% c("Wave 2", "Wave 3")) %>%
    group_by(prior, wave_time) %>%
    dplyr::summarize("COVID-19 discrimination" = cor(biden, apa.discrim.rona, use = "complete.obs"),
                     "Democratic Party ID" = cor(biden, DEM, use = "complete.obs")) %>%
    pivot_longer(cols = matches("COVID|Democratic"),
                 names_to = "variables",
                 values_to = "corr_coefs") %>%
    ggplot(aes(x = glue("{factor(wave_time)}"), y = corr_coefs, fill = variables)) +
    geom_col(position = "dodge2") +
    scale_y_continuous(limits=c(0,0.75)) + scale_fill_grey() +
    geom_text(position = position_dodge(width = 0.9), vjust = -0.5, aes(label = round(corr_coefs, digits = 2))) +
    facet_wrap(~prior) +
    labs(y = "Correlation coefficient (r) w/ Biden Vote Choice", x = "",
         fill = "Correlation variables") +
    theme(legend.position = "bottom")

cor_plot

ggsave(here("fig4.png"))

## Table 5 (Means)
df %>%
  filter(wave == 1) %>%
  group_by(prior) %>%
  count()

datasummary(
  (`Income` = income) + 
    (`Education` = edu) + 
    (`Male` = male) + 
    (`Chinese` = chinese) + 
    (`Filipino` = filipino) + 
    (`Indian` = indian) + 
    (`Japanese` = japanese) + 
    (`Korean` = korean) + 
    (`Vietnamese` = vietnamese) + 
    (`Democrat` = DEM) + 
    (`Republican` = GOP) ~ prior * (Mean + SD),
  data = df %>%
    filter(wave == 1) %>%
    select(income, edu, usborn, male, DEM, GOP, 
           chinese, indian, 
           filipino, vietnamese,
           korean, japanese,  
           prior),
  fmt = 2, 
  sparse_header = TRUE,
  note = "Only the first wave respondents were included",
  output = here("table5.docx"))

